import scvelo as scv
import pandas
import matplotlib
import loompy as lp
scv.settings.verbosity = 3 # show errors(0), warnings(1), info(2), hints(3)
scv.settings.presenter_view = True # set max width size for presenter view
scv.settings.set_figure_params('scvelo') # for beautified visualization
matplotlib.rcParams['figure.dpi']= 300 # High quality graphs
wt = scv.read('/Volumes/Samsung_T5/JesusProject/LOOMs_MafKO/CellRanger_WT.loom', cache=True, index_col="CellID")
ko = scv.read(filename='/Volumes/Samsung_T5/JesusProject/LOOMs_MafKO/CellRanger_MafKO.loom',
cache=True,
index_col="CellID")
combined = scv.read('/Volumes/Samsung_T5/JesusProject/LOOMs_MafKO/combined.loom', cache=True)
combined.var_names_make_unique
adata = combined
# Read the corrected barcodes for the combined loom file
ko_bc = pandas.read_csv('/Volumes/Samsung_T5/JesusProject/LOOMs_MafKO/ko_corrected.barcodes.loom.csv',
index_col='x',
header=0)
wt_bc = pandas.read_csv('/Volumes/Samsung_T5/JesusProject/LOOMs_MafKO/wt_corrected.barcodes.loom.csv',
index_col='x',
header=0)
wt.obs['original.barcode'] = wt.obs.index
ko.obs['original.barcode'] = ko.obs.index
wt.obs.index = wt_bc.index
ko.obs.index = ko_bc.index
# Read the metadata to subset the loom file for cells in Seurat object
df = pandas.read_csv('/Volumes/Samsung_T5/JesusProject/LOOMs_MafKO/metadata.seurat.csv',
index_col='CellID',
header=0)
# Get the barcodes common between metadata and the scvelo object and subset by index
barcodes = adata.obs.index.tolist()
cells = df.index.tolist()
both = set(barcodes).intersection(cells)
ind = [barcodes.index(x) for x in both]
adata_subset = adata[ind,:]
ind2 = [cells.index(x) for x in both]
df_subset = df.loc[both]
# Add the relevant information from the metadata
adata_subset.obs['metadataCellID'] = df_subset.index # To check consistent cbind
adata_subset.obs['clusters'] = df_subset['seurat_clusters'].astype('category')
adata_subset.obs['cluster_names'] = df_subset['cluster_name'].astype('category')
adata_subset.obs['genotype'] = df_subset['Genotype'].astype('category')
adata_subset.obs['sample'] = df_subset['Sample'].astype('category')
adata_subset.obs['UMAP_1'] = df_subset['UMAP1']
adata_subset.obs['UMAP_2'] = df_subset['UMAP2']
scv.pl.proportions(adata=adata_subset, groupby='sample')
scv.pl.proportions(adata=adata_subset, groupby='genotype')
# Replace the UMAP coordinates for the ones in the Seurat object
umaps = ['UMAP_1','UMAP_2']
coords = adata_subset.obs[umaps]
adata_subset.obsm['X_umap'] = coords.values
adata_subset.obsm['X_umap']
# Filter and normalize
scv.pp.filter_and_normalize(adata_subset, min_shared_counts=100, n_top_genes=2000)
scv.pp.moments(adata_subset, n_pcs=30, n_neighbors=30)
# Recover gene dynamics
scv.tl.recover_dynamics(adata_subset)
scv.tl.velocity(adata_subset, mode='dynamical')
scv.tl.velocity_graph(adata_subset)
#adata_subset.write('path_to_folder/MafKO.h5ad') # Save the object
# adata_subset = scv.read('path_to_folder/MafKO.h5ad') # Load back the object
scv.pl.velocity_embedding_stream(adata_subset, basis='umap', color='cluster_names', min_mass=0, save = 'stream_all.svg')
scv.tl.latent_time(adata_subset)
scv.pl.scatter(adata_subset, color='latent_time', color_map='gnuplot', size=80)
top_genes = adata_subset.var['fit_likelihood'].sort_values(ascending=False).index[:300]
scv.pl.heatmap(adata_subset, var_names=top_genes, sortby='latent_time', n_convolve=100, col_color=['cluster_names'])
scv.tl.score_genes_cell_cycle(adata_subset)
scv.pl.scatter(adata_subset, color_gradients=['S_score', 'G2M_score'], smooth=True, perc=[5, 95])
scv.tl.velocity_pseudotime(adata_subset)
scv.pl.scatter(adata_subset, color='velocity_pseudotime', cmap='gnuplot', figsize=(20,10))
scv.pl.heatmap(adata_subset, var_names=top_genes, sortby='velocity_pseudotime', n_convolve=100, col_color=['cluster_names'], yticklabels=True)
adata_subset.uns['neighbors']['distances'] = adata_subset.obsp['distances']
adata_subset.uns['neighbors']['connectivities'] = adata_subset.obsp['connectivities']
scv.tl.paga(adata_subset, groups='cluster_names')
df = scv.get_df(adata_subset, 'paga/transitions_confidence', precision=2).T
df.style.background_gradient(cmap='Blues').format('{:.2g}')
scv.pl.paga(adata_subset, basis='umap',color = 'cluster_names', size=50, alpha=.025,
min_edge_width=1, node_size_scale=1.5)
kwargs = dict(linewidth=2, add_linfit=True, frameon=True)
scv.pl.scatter(adata_subset, basis=['Sema3f','Cdh5','Prg3','Ptgr1','Maf'], add_outline='fit_diff_kinetics', **kwargs, figsize= (20,20))
# Split by WT vs MUT
wt = adata_subset[adata_subset.obs['genotype']== 'WT']
ko = adata_subset[adata_subset.obs['genotype']== 'KO']
scv.tl.recover_dynamics(wt)
scv.tl.recover_dynamics(ko)
scv.pp.neighbors(wt)
scv.pp.neighbors(ko)
scv.tl.velocity_graph(wt)
scv.tl.velocity_graph(ko)
scv.pl.velocity_embedding_stream(wt, basis='umap', n_neighbors = 100, save = 'stream_wt.svg')
scv.pl.velocity_embedding_stream(ko, basis='umap', n_neighbors = 100, save = 'stream_ko.svg')
scv.pl.velocity_embedding_stream(adata_subset, basis='umap',color='#C1C1C1', n_neighbors = 100, save = 'all.svg')
# Subset for those clusters that belong to endothelial cell types
clusters_keep = [11,21,1,0,13,4,17,18]
adata_endo = adata_subset[adata_subset.obs.clusters.isin(clusters_keep)].copy()
# Split by WT vs MUT
wt = adata_endo[adata_endo.obs['genotype']== 'WT']
ko = adata_endo[adata_endo.obs['genotype']== 'KO']
# Filter and normalize
scv.tl.recover_dynamics(wt)
scv.tl.recover_dynamics(ko)
scv.pp.neighbors(wt)
scv.pp.neighbors(ko)
scv.tl.velocity_graph(wt)
scv.tl.velocity_graph(ko)
scv.pl.velocity_embedding_stream(wt, basis='umap', n_neighbors = 100, save = 'stream_wt_endo.svg')
scv.pl.velocity_embedding_stream(ko, basis='umap', n_neighbors = 100, save = 'stream_ko_endo.svg')
# Paga graph abstraction for WT and MUT separately
wt.uns['neighbors']['distances'] = wt.obsp['distances']
wt.uns['neighbors']['connectivities'] = wt.obsp['connectivities']
scv.tl.paga(wt, groups='cluster_names')
df = scv.get_df(wt, 'paga/transitions_confidence', precision=2).T
df.style.background_gradient(cmap='Blues').format('{:.2g}')
scv.pl.paga(wt, basis='umap',color = 'cluster_names', size=50, alpha=.025,
min_edge_width=0, node_size_scale=0.5, save = 'paga_wt_endo.svg')
ko.uns['neighbors']['distances'] = ko.obsp['distances']
ko.uns['neighbors']['connectivities'] = ko.obsp['connectivities']
scv.tl.paga(ko, groups='cluster_names')
df = scv.get_df(ko, 'paga/transitions_confidence', precision=2).T
df.style.background_gradient(cmap='Blues').format('{:.2g}')
scv.pl.paga(ko, basis='umap',color = 'cluster_names', size=50, alpha=.025,
min_edge_width=0, node_size_scale=0.5, save = 'paga_ko_endo.svg')
scv.pl.velocity_embedding_stream(adata_endo, basis='umap',color='#C1C1C1', n_neighbors = 100, save = 'endo_all.svg')
cluster_colors = ('#4292c6','#08519c','#fe9929','#ec7014','#fec44f','#cc4c02','#9ecae1','#8b3d2d')
scv.pl.velocity_embedding_stream(adata_endo, basis='umap', n_neighbors = 100, palette=cluster_colors, save = 'stream_endo_all_clusters.svg')
scv.tl.velocity_pseudotime(adata_endo)
scv.pl.scatter(adata_endo, color='velocity_pseudotime', cmap='gnuplot', save = 'pseudotime_endo.svg')
scv.pl.velocity_embedding_stream(adata_endo, basis='umap',palette=cluster_colors, save = 'subset_sinusoids_rightColor.svg')
import scipy
transition=scv.utils.get_transition_matrix(adata_endo)
df = pandas.DataFrame(data=scipy.sparse.csr_matrix.todense(transition))
df.to_csv('/Volumes/Samsung_T5/JesusProject/ProcessedData/Transitions_EndoOnly.csv', index=True)
top_genes = adata_endo.var['fit_likelihood'].sort_values(ascending=False).index[:300]
scv.pl.heatmap(adata_endo, var_names=top_genes, sortby='velocity_pseudotime', n_convolve=100, col_color=['genotype'], yticklabels=True)
scv.tl.differential_kinetic_test(adata_endo, var_names=var_names, groupby='genotype')
top_genes1 = wt.var['fit_likelihood'].sort_values(ascending=False).index[:300]
top_genes2 = ko.var['fit_likelihood'].sort_values(ascending=False).index[:300]
top_genes = top_genes1.union(top_genes2)
top_genes = top_genes1.union(top_genes2))